import jieba
def cutWord(txt):
    return jieba.lcut(txt)

def getTxt(path):
    return open(path,"r",encoding='UTF-8').read()

def countWord(lis):
    count={}
    for i in lis:
        if len(i)==1:
            continue
        elif i == "诸葛亮" or i == "孔明曰" or i == "孔明":
            rword="诸葛亮"
        elif i == "刘备" or i == "玄德" or i == "玄德曰":
            rword="诸葛亮"
        elif i == "关羽" or i == "关公" or i == "云长" :
            rword="诸葛亮"
        else :
            rword=i
        count[rword]=count.get(rword,0)+1
    return count

def reject(lis):
    excludes={"将军","却说","丞相","二人","不可","荆州","不能","如此","商议","如何","主公","军士","左右","军马","次日","引兵","大喜","天下"
    ,"东吴","于是","今日","不敢","魏兵","陛下","人马","都督","一人"
    ,"不知","汉中","众将","只见","后主","蜀兵","夏侯","大叫","上马"
    ,"此人","先主","------------","太守","天子","后人","背后","城中"
    ,"一面","何不","忽报","大军","先生","何故","然后","先锋","夫人"
    ,"不如"}
    for word in excludes:
        del(lis[word])


def topOutput(wordList,n=5):
    for i in range (n):  
        print(wordList[i])

def main():
    wordList=cutWord(getTxt("sanguo.txt"))
    Words=countWord(wordList)
    reject(Words)
    item=list(Words.items())
    item.sort(key=lambda x: x[1],reverse=True)
    topOutput(item,100)

main()
